home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Technotools
/
Technotools (Chestnut CD-ROM)(1993).ISO
/
misc_pto
/
ctag02
/
ctag.e
< prev
Wrap
Text File
|
1991-09-12
|
68KB
|
1,996 lines
/*
EPSHeader
File: ctag.c
Author: J. Kercheval
Created: Sun, 07/14/1991 17:24:44
*/
/*
EPSRevision History
J. Kercheval Sat, 07/27/1991 22:08:04 creation
J. Kercheval Sun, 08/18/1991 20:58:13 completion of C_get_token()
J. Kercheval Wed, 08/21/1991 22:34:49 place function recognition
J. Kercheval Wed, 08/21/1991 23:11:17 add defines and macros
J. Kercheval Wed, 08/21/1991 23:54:33 add typedef and class parsing
J. Kercheval Thu, 08/22/1991 23:53:51 add global variables
J. Kercheval Thu, 08/22/1991 23:54:05 add enum, struct, union
J. Kercheval Thu, 08/22/1991 23:54:28 add globals via typedefs
J. Kercheval Sun, 08/25/1991 23:09:28 complete semantic parser
J. Kercheval Tue, 08/27/1991 23:28:34 fix bug in typedef, struct, enum and union declarations
J. Kercheval Sat, 08/31/1991 23:58:03 add prototype parsing
J. Kercheval Tue, 09/03/1991 22:28:55 move many macros to functions
J. Kercheval Tue, 09/03/1991 23:05:34 clean code and consolidate to functions
J. Kercheval Sun, 09/08/1991 13:24:53 minor bug fix in function and global variable parser
J. Kercheval Sun, 09/08/1991 21:31:06 fix bug in lexical parser
J. Kercheval Sun, 09/08/1991 23:44:46 \v is not a valid literal in Epsilon, remove it
J. Kercheval Mon, 09/09/1991 21:49:00 fix bug in function parser
J. Kercheval Mon, 09/09/1991 22:44:46 fix bug in define parser
J. Kercheval Tue, 09/10/1991 22:06:09 fix typedef parser
J. Kercheval Wed, 09/11/1991 02:04:48 add extern symbol recognition
J. Kercheval Wed, 09/11/1991 19:49:11 fix bug in function pointer variable declaration
J. Kercheval Wed, 09/11/1991 20:38:13 add support for function pointer variable declarations after first declaration
J. Kercheval Wed, 09/11/1991 21:51:37 move #directive parsing between semantic and lexical parser
J. Kercheval Thu, 09/12/1991 22:44:43 add support for #ifdef blocks to avoid unmatched parens in ToLevelZero parsing
J. Kercheval Fri, 09/13/1991 01:17:05 add when_loading() to remap def_srch_case_map[]
*/
/*
* This file implements tagging for .C, .H and .E files which contain
* standard C and C++ syntax. This file defines no new commands and
* is intended to work with the tags package included with V5.0 of Epsilon.
* There is no problem using modified tags packages providing calls are made
* to tags_suffix_???() routines in the same way Epsilon does this and that
* an output routine add_tag() is used. All that should be required is to
* compile and load this file and this module will be used transparently to
* you. If you wish to costumize the types of tags output modify the global
* variables CTagWant?????? which when TRUE allow the output of that type of
* tag and when FALSE prohibit that type of tag.
*
* This module implements tagging for union, struct, enum, typedef, #define,
* global variables, classes, prototypes and functions (all of which may be
* specifically turned on and off. The performance cost for this level of
* accuracyis not trivial. This parser knows a lot about the syntax of C and
* takes a fair amount of time. You should expect to see a file complete
* somewhere in the range of 125%-150% of the time as Lugaru's tagger for C.
* This is not only because of the detail of the tags but also the larger
* number of them. This tagger is not intended to do all of your work for
* you but is designed to be used in conjunction with the tags generator I
* have developed and is now available. This file implements the same
* lexical and semantic parser as is found in that executable. Use the
* executable in your make file for very fast and updated tags. If you have
* problems finding it, contact me and I can point the way...
*
* There is defined at the end of this module a when_loading() function which
* alters the default search case map to allow *correct* (or at least
* consistent sorting with sort routines external to Epsilon. In particular,
* to produce the same sort order as any UNIX, VMS or HP style sort or with
* the tags generator this module is supposed to coexist with this mapping
* must be done. You should see no difference in the location of sorted
* buffers except for lines starting with ^, [, \, ] and _.
*
* This code is dedicated to the public domain with the caveat that Lugaru is
* welcome to use this within their distribution source code which is
* supplied with Epsilon.
*
* Good Tagging,
*
* jbk@wrq.com
*
* John Kercheval
* 127 NW Bowdoin Pl #105
* Seattle, WA 98107-4960
* September 8, 1991
*/
#include <eel.h>
#define BOOLEAN int
#define TRUE 1
#define FALSE 0
#define CBUFSIZE 4096
#define MAX_TOKEN_LENGTH 4096
/* the following variable determine the behavior of the parser with respect
* to the token types which are output as a tag. Note that use of the
* CTagWantExtern variable is a modifier and will only be effective when
* other options are used (ie. CTagWantProtoType must be specified to obtain
* extern prototypes, CTagWantExtern alone yields nothing). Note also that
* the CTagWantExtern modifier has no effect for function, define and macro
* tags which are tagged only according only to the CTagWantFunction,
* CTagWantDefine and CTagWantMacro variables respectively.
*/
BOOLEAN CTagWantFunction = TRUE;
BOOLEAN CTagWantProtoType = FALSE;
BOOLEAN CTagWantStructure = TRUE;
BOOLEAN CTagWantTypeDefinition = TRUE;
BOOLEAN CTagWantMacro = TRUE;
BOOLEAN CTagWantEnumeration = TRUE;
BOOLEAN CTagWantUnion = TRUE;
BOOLEAN CTagWantGlobalVariable = TRUE;
BOOLEAN CTagWantClass = TRUE;
BOOLEAN CTagWantDefine = TRUE;
BOOLEAN CTagWantExtern = FALSE;
/* function for determining if character is whitespace */
#define IsWhite(c) ( _C_white_boolean_table[c] )
/* the indexed table for white space character lookup */
BOOLEAN _C_white_boolean_table[256];
/* list of whitespace characters */
char C_white[] = " \f\t\n\r";
/* function for determining if character is a delimiter */
#define IsDelim(c) ( _C_delim_boolean_table[c] )
/* the indexed table for token delimiter lookup */
BOOLEAN _C_delim_boolean_table[256];
/* list of token delimiters */
char C_delim[] = " \f\t\n\r\"[](){}#;:,.'=-+*/%&|^~!<>?";
/* function for determining if character is a puncuator */
#define IsPunctuator(c) ( _C_punctuator_boolean_table[c] )
/* the indexed table for punctuator character lookup */
BOOLEAN _C_punctuator_boolean_table[256];
/* list of punctuators */
char C_declaration_delim[] = "[](){},;=";
char C_open_brace[] = "{[("; /* open brace set */
char C_close_brace[] = ")]}"; /* close brace set */
/*
* These defines are used to denote the type of the current tag
*/
#define NOP 0
#define Function 1
#define ProtoType 2
#define Structure 3
#define TypeDefinition 4
#define Macro 5
#define Enumeration 6
#define Union 7
#define GlobalVariable 8
#define Class 9
#define Define 10
#define Extern 11
/* convenient definition */
typedef int SymbolType;
/* the current file buffer state */
typedef struct BufferStruct {
int token_line_location; /* current token line in buffer */
char *inbuf; /* the buffer currently being parsed */
} Buffer;
/* the current input token state */
typedef struct TokenStruct {
char sbuf1[MAX_TOKEN_LENGTH]; /* the first token buffer */
int charloc1; /* the char location of sbuf1 */
int tokenline1; /* the line number of sbuf1 */
char sbuf2[MAX_TOKEN_LENGTH]; /* the second token buffer */
int charloc2; /* the char location of sbuf2 */
int tokenline2; /* the line number of sbuf2 */
char *cur_token; /* pointer to the current token buffer */
int *cur_char_location; /* the location of current token */
int *cur_token_line; /* the line of the current token */
char *prev_token; /* pointer to the last token buffer */
int *prev_char_location; /* the location of previous token */
int *prev_token_line; /* the line of the previous token */
int token_count; /* temporary variable, used by ToPunctuator */
int else_nesting_level; /* the current nesting level */
BOOLEAN extern_active; /* minor state for this statement */
} Token;
#define SYMBOL_SIZE 20
/* a list of known C tokens and keywords */
char C_token_list[][SYMBOL_SIZE] =
{
"*ivclsdfuaretp_hn", /* list of starting characters of symbols
* below */
"*", /* pointer */
"int", /* integer declaration */
"void", /* void type */
"char", /* character */
"long", /* long integer */
"short", /* short integer */
"double", /* double floating point */
"float", /* floating point */
"signed", /* signed integer */
"unsigned", /* unsigned integer */
"auto", /* auto variable (local duration) */
"register", /* register variable */
"static", /* static variable */
"struct", /* structure define */
"union", /* union define */
"enum", /* enum defined */
"typedef", /* type definition */
"const", /* constant variable */
"extern", /* external declaration */
"class", /* class declaration */
"friend", /* class modifier */
"private", /* class modifier */
"protected", /* class modifier */
"public", /* class modifier */
"volatile", /* Compiler warning */
"_based", /* pointer type */
"_cdecl", /* parameter calling sequence, C style */
"cdecl", /* parameter calling sequence, C style */
"_far", /* pointer type */
"far", /* pointer type */
"_huge", /* pointer type */
"huge", /* pointer type */
"_near", /* pointer type */
"near", /* pointer type */
"_pascal", /* parameter calling sequence, PASCAL style */
"pascal", /* parameter calling sequence, PASCAL style */
"_fortran", /* parameter calling sequence, FORTRAN style */
"_fastcall", /* parameter calling sequence, via registers */
"\0"
};
/*----------------------------------------------------------------------------
*
* CParserInit() initializes the tables required by the parser. The tables
* used are a simple boolean index which are true if the character
* corresponding to the index is a member of the associated table.
*
---------------------------------------------------------------------------*/
CParserInit()
{
char *s;
int i;
/* init the entire block to FALSE */
for (i = 0; i < 256; i++) {
_C_delim_boolean_table[i] = FALSE;
_C_white_boolean_table[i] = FALSE;
_C_punctuator_boolean_table[i] = FALSE;
}
/* set the characters in the delim set to TRUE */
for (s = C_delim; *s; s++) {
_C_delim_boolean_table[*s] = TRUE;
}
/* set the characters in the white set to TRUE */
for (s = C_white; *s; s++) {
_C_white_boolean_table[*s] = TRUE;
}
/* set the characters in the punctuator set to TRUE */
for (s = C_declaration_delim; *s; s++) {
_C_punctuator_boolean_table[*s] = TRUE;
}
}
/*----------------------------------------------------------------------------
*
* strchr() is the standard string library function strchr()
*
---------------------------------------------------------------------------*/
char *strchr(s, c)
char *s;
char c;
{
char *ret = s;
while (*ret) {
if (*ret == c)
return ret;
ret++;
}
if (*ret == c)
return ret;
return NULL;
}
/*----------------------------------------------------------------------------
*
* FillBuffer() fills the passed buffer parameter with bufsize characters
* (or as many as are available) and places and null character '\0' at the
* end of the buffer. This routine returns TRUE if successful and FALSE if
* eof(infile) is true. Note: if a bufsize parameter is passed and the read
* is successful for bufsize characters, then buffer[bufsize] will be
* overwritten with the null character. Do not pass a bufsize the maximum
* size of the buffer. This null terminated buffering scheme assumes the
* source file has no null character embedded within it.
*
---------------------------------------------------------------------------*/
BOOLEAN FillBuffer(inbuf, ctag_buffer, bufsize)
char *inbuf;
char *ctag_buffer;
int bufsize;
{
char *old_buf;
int new_point;
/* init buffer */
*ctag_buffer = '\0';
/* go to the input buffer */
old_buf = bufname;
bufname = inbuf;
/* return if end of buffer */
if (point == size())
return FALSE;
/* read the buffer from the file */
if (point + bufsize > size()) {
new_point = size();
}
else {
new_point = point + bufsize;
}
grab(point, new_point, ctag_buffer);
/* place the end of buffer mark, adjust point and return success */
ctag_buffer[new_point - point] = '\0';
point = new_point;
return TRUE;
}
/*----------------------------------------------------------------------------
*
* CSymbolWanted() returns true if flags are true for the symbol type passed
* and false otherwise.
*
---------------------------------------------------------------------------*/
BOOLEAN CSymbolWanted(type)
SymbolType type;
{
switch (type) {
case Function:
return CTagWantFunction;
break;
case ProtoType:
return CTagWantProtoType;
break;
case GlobalVariable:
return CTagWantGlobalVariable;
break;
case Define:
return CTagWantDefine;
break;
case Macro:
return CTagWantMacro;
break;
case Structure:
return CTagWantStructure;
break;
case TypeDefinition:
return CTagWantTypeDefinition;
break;
case Enumeration:
return CTagWantEnumeration;
break;
case Union:
return CTagWantUnion;
break;
case Class:
return CTagWantClass;
break;
default:
return FALSE;
break;
}
}
/*----------------------------------------------------------------------------
*
* CTokenType() takes the token passed and determines if the token is a
* special token. Special tokens require specialized handling in the parser.
* The function returns the type of token according to the SymbolTypeEnum
* enumeration. This routine can only tell so much from one symbol but will
* return some type for all the *interesting* tokens. Anything that is
* loosely defined is given back with the closest type available and the
* parser must give it contextual meaning
*
---------------------------------------------------------------------------*/
SymbolType CTokenType(token)
char *token;
{
/* look for dirty rejection */
if (!strchr("cestu#", token[0]))
return NOP;
/* macro and non macro defines */
if (!strcmp(token, "#"))
return Define;
/* structure declarations */
if (!strcmp(token, "struct"))
return Structure;
/* type declaration */
if (!strcmp(token, "typedef"))
return TypeDefinition;
/* enumeration declaration */
if (!strcmp(token, "enum"))
return Enumeration;
/* union declaration */
if (!strcmp(token, "union"))
return Union;
/* class declaration */
if (!strcmp(token, "class"))
return Class;
/* do not recognize it as anything special */
return NOP;
}
/*----------------------------------------------------------------------------
*
* CIsDeclarationToken() takes the token passed and determines if the token
* is a declaration keyword used in C. The user may define new declaration
* keywords via use of the typedef keyword. This alters the syntax of C. If
* the syntax is changed in this way it is probable that this routine would
* not return the correct value. For the standard uses of this routine that
* information should not hinder performance for the vast majority of the
* cases.
*
---------------------------------------------------------------------------*/
BOOLEAN CIsDeclarationToken(token)
char *token;
{
int index;
/* look for dirty rejection */
if (!strchr(C_token_list[0], token[0]))
return FALSE;
/* march through array until membership is determined */
for (index = 1; *C_token_list[index]; (index)++) {
/* return true if token found */
if (!strcmp(token, C_token_list[index])) {
return TRUE;
}
}
/* did not find it */
return FALSE;
}
/*----------------------------------------------------------------------------
*
* COutputToken() will output a token of a given type. The token is output
* if the passed token type is requested from the command line.
*
---------------------------------------------------------------------------*/
COutputToken(token, token_buffer, token_type, infname, outbuf)
Token *token;
Buffer *token_buffer;
SymbolType token_type;
char *infname;
char *outbuf;
{
/* check that the symbol is wanted and output it if so */
if (CSymbolWanted(token_type)) {
if (token->extern_active) {
if (!CTagWantExtern) {
if (token_type != Function &&
token_type != Define &&
token_type != Macro) {
return;
}
}
}
add_tag(token->prev_token, infname,
*(token->prev_char_location) -
strlen(token->prev_token));
}
}
/*----------------------------------------------------------------------------
*
* CGetToken() will obtain the next token in the line pointed to by lptr
* and in addition will return FALSE if EOL is reached or a comment character
* is the first non whitespace character found. This routine is passed an
* inbut buffer (Cbuf) and a current pointer into the buffer. It is the
* responsibility of this routine to refill the buffer if required. Quoted
* strings and single quoted characters are returned as a single token.
* Comments are completely ignored by this parser.
*
---------------------------------------------------------------------------*/
BOOLEAN CGetToken(inbuf, token, line_number)
char *inbuf;
char *token;
int *line_number;
{
/* a state of the lexical parser */
#define Parse 0
#define BeginCommentMaybe 1
#define InComment 2
#define InCommentEndMaybe 3
#define InCPPComment 4
#define InQuoteNormal 5
#define InQuoteLiteral 6
#define InSingleQuoteNormal 7
#define InSingleQuoteLiteral 8
#define WhiteSpace 9
#define Exit 10
typedef int State;
State current_state; /* the current state of the parser */
char c; /* the current character being examined */
char *t; /* pointer into token */
/* init */
current_state = WhiteSpace;
t = token;
*t = '\0';
/* parse the file for the next token */
while (TRUE) {
/* if the buffer has been completely used, return FALSE */
if (point == size())
return FALSE;
c = curchar();
point++;
/* react on the state machine */
switch (current_state) {
case Parse:
switch (c) {
case '/':
/* return if we already have a token */
if (t != token) {
point--;
current_state = Exit;
}
else {
/* this may be the begin if a comment or the
* division symbol, read the next character after
* verifying it the buffer doesn't need refilling */
current_state = BeginCommentMaybe;
*t = c;
}
break;
case '\"':
/* return if we already have a token */
if (t != token) {
point--;
current_state = Exit;
}
else {
current_state = InQuoteNormal;
*t++ = c;
}
break;
case '\'':
/* return if we already have a token */
if (t != token) {
point--;
current_state = Exit;
}
else {
current_state = InSingleQuoteNormal;
*t++ = c;
}
break;
default:
/* if it is a delimiter than stop processing */
if (IsDelim(c)) {
/* if a token exists then back up in buffer */
if (t != token) {
point--;
}
else {
*t++ = c;
}
current_state = Exit;
}
else {
/* normal character, store it in the token */
*t++ = c;
}
break;
}
break;
case WhiteSpace:
/* pass over whitespace, backup one char if no longer in
* white space region */
if (!IsWhite(c)) {
current_state = Parse;
point--;
}
else {
/* check for newline */
if (c == '\n') {
(*line_number)++;
}
}
break;
case BeginCommentMaybe:
switch (c) {
case '/':
current_state = InCPPComment;
break;
case '*':
current_state = InComment;
break;
default:
t++;
point--;
current_state = Exit;
break;
}
break;
case InComment:
switch (c) {
case '*':
/* this is potentially the end of the comment */
current_state = InCommentEndMaybe;
break;
default:
/* just keep on going and check for newline */
if (c == '\n') {
(*line_number)++;
}
break;
}
break;
case InCommentEndMaybe:
switch (c) {
case '/':
/* this is indeed the end of the comment */
current_state = WhiteSpace;
break;
case '*':
/* this is also perhaps the end of comment */
break;
case '\n':
/* new line just increment state variables */
if (c == '\n') {
(*line_number)++;
}
default:
/* still part of the current comment */
current_state = InComment;
break;
}
break;
case InCPPComment:
if (c == '\n') {
current_state = WhiteSpace;
(*line_number)++;
}
break;
case InQuoteNormal:
switch (c) {
case '\"':
/* end of InQuoteNormal state */
current_state = Exit;
break;
case '\\':
/* InQuoteLiteral state */
current_state = InQuoteLiteral;
break;
default:
/* normal dull behavior */
break;
}
*t++ = c;
break;
case InQuoteLiteral:
/* this char is simply copied */
current_state = InQuoteNormal;
*t++ = c;
break;
case InSingleQuoteNormal:
switch (c) {
case '\'':
/* end of InSingleQuoteNormal state */
current_state = Exit;
break;
case '\\':
/* InQuoteLiteral state */
current_state = InSingleQuoteLiteral;
break;
default:
/* should not be reached */
break;
}
*t++ = c;
break;
case InSingleQuoteLiteral:
/* this char is simply copied */
current_state = InSingleQuoteNormal;
*t++ = c;
break;
case Exit:
*t = '\0';
point--;
return TRUE;
break;
default: /* not reached */
break;
}
}
}
/*----------------------------------------------------------------------------
*
* CFillToken() will obtain the next lexical parser from the buffer and move
* the token into the Token structure. TRUE is returned if the lexical
* parser returns TRUE, otherwise FALSE is returned.
*
---------------------------------------------------------------------------*/
BOOLEAN CFillToken(token, token_buffer)
Token *token;
Buffer *token_buffer;
{
BOOLEAN token_found;
/* obtain the next token */
token_found = CGetToken(token_buffer->inbuf,
token->cur_token,
&(token_buffer->token_line_location));
/* if one is around then update the state for that token */
if (token_found) {
/* update location variables */
*(token->cur_char_location) = point;
*(token->cur_token_line) = token_buffer->token_line_location;
}
return token_found;
}
/*----------------------------------------------------------------------------
*
* CTokenSwap() will swap the token variables and set the prev_ variables
* correctly
*
---------------------------------------------------------------------------*/
CTokenSwap(token)
Token *token;
{
char *charswap; /* temporary swap variable */
int *longintswap; /* temporary swap variable */
/* swap the active token string */
charswap = token->cur_token;
token->cur_token = token->prev_token;
token->prev_token = charswap;
/* swap the active character location */
longintswap = token->cur_char_location;
token->cur_char_location = token->prev_char_location;
token->prev_char_location = longintswap;
/* swap the active line */
longintswap = token->cur_token_line;
token->cur_token_line = token->prev_token_line;
token->prev_token_line = longintswap;
}
/*----------------------------------------------------------------------------
*
* CDiscardLine() will move past all the characters up to the next EOL that
* is not preceded by a line continuation character. This routine will
* return TRUE if there was a '(' character as the first character. This
* return value is useful for determining if #defines are macros or simple
* defines.
*
---------------------------------------------------------------------------*/
BOOLEAN CDiscardLine(inbuf, line_number)
char *inbuf;
int *line_number;
{
char c; /* the current character being examined */
BOOLEAN line_continue; /* TRUE if line continuation true */
BOOLEAN is_macro; /* TRUE if the first delimiter char is '(' */
BOOLEAN first_char; /* TRUE when first character is active */
/* init */
c = '\0';
line_continue = FALSE;
is_macro = FALSE;
first_char = TRUE;
/* if the end of buffer is reached then return */
if (point == size())
return is_macro;
/* loop until non continued EOL encountered */
do {
/* determine if the first character is a '(' */
if (first_char) {
if (c == '(')
is_macro = TRUE;
first_char = FALSE;
}
/* handle the newline */
if (c == '\n') {
line_continue = FALSE;
(*line_number)++;
}
c = curchar();
point++;
if (c == '\\')
line_continue = TRUE;
} while (c != '\n' || line_continue);
(*line_number)++;
return is_macro;
}
/*----------------------------------------------------------------------------
*
* CParseDefine() will parse macros and defines in standard C syntax
* distinguish between a macro and a define, if there is a punctuator '(',
* then it is a macro. Take the token just before the first space or
* punctuator
*
---------------------------------------------------------------------------*/
CParseDefine(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
SymbolType tmptype; /* a temporay type variable */
BOOLEAN token_found;
BOOLEAN is_macro;
token_found = CFillToken(token, token_buffer);
if (token_found) {
/* save the previous values */
CTokenSwap(token);
/* get rid of the rest of the line and return the define type */
is_macro =
CDiscardLine(token_buffer->inbuf,
&(token_buffer->token_line_location));
/* react on the token */
if (is_macro) {
tmptype = Macro;
}
else {
tmptype = Define;
}
/* output the token */
COutputToken(token, token_buffer, tmptype,
infname, outbuf);
}
}
/*----------------------------------------------------------------------------
*
* CParsePreprocessorDirective() will parse preprocessor directives in
* standard C syntax
*
---------------------------------------------------------------------------*/
CParsePreprocessorDirective(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
BOOLEAN token_found;
token_found = CFillToken(token, token_buffer);
if (token_found) {
/* deal with a define directive */
if (!strcmp(token->cur_token, "define")) {
CParseDefine(token, token_buffer, infname, outbuf);
}
else {
/* increment the else block level pointer */
if (!strcmp(token->cur_token, "else")) {
token->else_nesting_level++;
}
else {
/* decrement the else block level pointer */
if (!strcmp(token->cur_token, "endif")) {
if (token->else_nesting_level)
token->else_nesting_level--;
}
else {
/* if an else has not already been seen then increment
* the level */
if (!strcmp(token->cur_token, "elif")) {
token->else_nesting_level++;
}
}
}
/* remove the rest of the directive line including line
* continuation characters */
CDiscardLine(token_buffer->inbuf,
&(token_buffer->token_line_location));
}
}
}
/*----------------------------------------------------------------------------
*
* CNextToken() will obtain the next token in the buffer and update the
* appropriate variables.
*
---------------------------------------------------------------------------*/
BOOLEAN CNextToken(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
BOOLEAN token_found;
BOOLEAN cycle;
do {
/* obtain the next token */
token_found = CFillToken(token, token_buffer);
/* check for preprocessing directives and parse them if found */
if (token->cur_token[0] == '#' && token_found) {
/* parse the directive and loop back to get another token */
CParsePreprocessorDirective(token, token_buffer, infname, outbuf);
cycle = TRUE;
}
else {
/* we found a token to pass to the semantic parser */
cycle = FALSE;
}
} while (cycle);
/* return it */
return token_found;
}
/*----------------------------------------------------------------------------
*
* CToLevelZero() will increment the nesting level and then parse tokens
* until level zero has been reached again. If tokens are no longer
* available this loop will stop.
*
---------------------------------------------------------------------------*/
CToLevelZero(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
int nesting_level = 1;
token->else_nesting_level = 0;
while (nesting_level) {
if (CGetToken(token_buffer->inbuf, token->cur_token,
&(token_buffer->token_line_location))) {
if (token->cur_token[0] == '#') {
CParsePreprocessorDirective(token, token_buffer,
infname, outbuf);
}
else {
/* only count open brace, parens and brackets within blocks
* of one element of an ifdef code block */
if (!token->else_nesting_level) {
if (strchr(C_open_brace, token->cur_token[0]))
nesting_level++;
else
if (strchr(C_close_brace, token->cur_token[0]))
nesting_level--;
}
}
}
else
nesting_level = 0;
}
}
/*----------------------------------------------------------------------------
*
* CToPunctuator() will parse tokens until the next punctuator has been
* reached. If tokens are no longer available this loop will stop. If this
* loop is successful the found flag declared in the host routine will be
* set.
*
---------------------------------------------------------------------------*/
BOOLEAN CToPunctuator(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
BOOLEAN punctuator_found;
/* init and parse through until the first punctuator is found */
token->token_count = 0;
punctuator_found = FALSE;
while (!punctuator_found) {
token->token_count++;
CTokenSwap(token);
if (!CNextToken(token, token_buffer, infname, outbuf)) {
break;
}
else {
if (IsPunctuator(token->cur_token[0]))
punctuator_found = TRUE;
}
}
/* return value */
return punctuator_found;
}
/*----------------------------------------------------------------------------
*
* CParseParens() will move through a declaration in parentheses and place
* the correct valid token as prev_token. This return TRUE if a '[' was seen
* within the parens and false otherwise.
*
---------------------------------------------------------------------------*/
BOOLEAN CParseParens(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
BOOLEAN token_found;
BOOLEAN variable_seen;
int brace_ignore = 1;
token->else_nesting_level = 0;
token_found = TRUE;
variable_seen = FALSE;
while (brace_ignore &&
token_found) {
token_found = CNextToken(token, token_buffer, infname, outbuf);
if (token_found &&
!token->else_nesting_level) {
switch (token->cur_token[0]) {
case '(':
/* increment brace_ignore and continue */
brace_ignore++;
break;
case ')':
/* just decrement brace_ignore if it is positive. If
* brace ignore is not positive at this point then we
* certainly have a syntax error. Ignore this fact if
* so. */
if (brace_ignore) {
brace_ignore--;
}
break;
case '[':
/* move to end of array bounds */
variable_seen = TRUE;
CToLevelZero(token, token_buffer, infname, outbuf);
break;
default:
CTokenSwap(token);
break;
}
}
}
return variable_seen;
}
/*----------------------------------------------------------------------------
*
* COutputCommaDelimitedToken() will output a token and then parse the
* statement until ';' or ',' is reached. The token is output if the passed
* token type is requested from the command line.
*
---------------------------------------------------------------------------*/
COutputCommaDelimitedToken(token, token_buffer, token_type, infname, outbuf)
Token *token;
Buffer *token_buffer;
SymbolType token_type;
char *infname;
char *outbuf;
{
BOOLEAN punctuator_found;
/* output the token */
COutputToken(token, token_buffer, token_type, infname, outbuf);
/* go to the next list punctuator (',' or ';') */
punctuator_found = TRUE;
while (token->cur_token[0] != ',' &&
token->cur_token[0] != ';' &&
punctuator_found) {
if (strchr(C_open_brace, token->cur_token[0])) {
CToLevelZero(token, token_buffer, infname, outbuf);
}
punctuator_found = CToPunctuator(token, token_buffer, infname, outbuf);
}
}
/*----------------------------------------------------------------------------
*
* CParseCommaDelimitedList() will parse a token list seperated by commas
* until a ';' is found. The tokens are output if the passed type is
* requested from the command line.
*
---------------------------------------------------------------------------*/
CParseCommaDelimitedList(token, token_buffer, token_type, infname, outbuf)
Token *token;
Buffer *token_buffer;
SymbolType token_type;
char *infname;
char *outbuf;
{
BOOLEAN punctuator_found;
/* parse through the list */
punctuator_found = TRUE;
while (token->cur_token[0] != ';' &&
punctuator_found) {
punctuator_found = CToPunctuator(token, token_buffer, infname, outbuf);
if (punctuator_found) {
switch (token->cur_token[0]) {
case '(':
/* this is an embedded variable declaration, either a
* complex variable pointer or function pointer, fall
* through after picking out the internal token */
CParseParens(token, token_buffer, infname, outbuf);
case '[':
case ',':
case ';':
case '=':
/* this is one of the proper ending tokens for this type
* of declaration list, so output it and parse to the
* next correct punctuator */
COutputToken(token, token_buffer, token_type,
infname, outbuf);
while (token->cur_token[0] != ',' &&
token->cur_token[0] != ';' &&
punctuator_found) {
if (strchr(C_open_brace, token->cur_token[0])) {
CToLevelZero(token, token_buffer, infname, outbuf);
}
punctuator_found = CToPunctuator(token, token_buffer,
infname, outbuf);
}
break;
default:
break;
}
}
}
}
/*----------------------------------------------------------------------------
*
* CParseFunctionOrGlobalVariable() will parse a function, prototype or
* global variable syntax.
*
---------------------------------------------------------------------------*/
CParseFunctionOrGlobalVariable(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
char *line_buf; /* the first token buffer */
int charloc; /* the char location of sbuf1 */
int tokenline; /* the line number of sbuf1 */
BOOLEAN token_found;
BOOLEAN punctuator_found;
BOOLEAN last_token_known;
BOOLEAN variable_seen;
/* init */
charloc = 0;
tokenline = 1;
line_buf = malloc(MAX_TOKEN_LENGTH);
line_buf[0] = '\0';
/* save the previous token */
last_token_known = CIsDeclarationToken(token->prev_token);
if (!last_token_known) {
/* If this is not a known token then it may be a function name. Save
* it then look further at the syntax. This also may be a symbol
* previously defined via a typedef which alters the syntax of C/C++ */
strcpy(line_buf, token->prev_token);
charloc = *(token->prev_char_location);
tokenline = *(token->prev_token_line);
}
/* This is a function or prototype or global variable go to brace_ignore
* level zero again. */
variable_seen = CParseParens(token, token_buffer, infname, outbuf);
/* Check to see if this is a function, prototype, or global variable. If
* the token is a ';' and last_token_known is false then we assume a
* function. Strange variable declarations may fool this, but not
* likely. If the character is a '(' then it is certainly a function or
* prototype unless variable_seen is TRUE, then it is a variable. If the
* character is a '[', ',' then it is certainly a variable declaration.
* If the character is a ';' and last_token_known is true then it is a
* variable declaration. If the token is anything else then it is a
* function. */
token_found = CNextToken(token, token_buffer, infname, outbuf);
if (token_found) {
switch (token->cur_token[0]) {
case ';':
/* determine if a prototype or a variable declaration. if the
* last_token_known is true then it is a global variable. If
* the token was a symbol defined by a typedef then this
* distinction is incorrect since typedef actually alters
* syntax. This is correct for the large majority of cases
* since most do not enclose simple variable declarations in
* parens. */
if (last_token_known) {
/* this is a global variable */
COutputToken(token, token_buffer, GlobalVariable,
infname, outbuf);
}
else {
/* this is a prototype, copy saved token back to
* prev_token, output and continue */
strcpy(token->prev_token, line_buf);
*(token->prev_char_location) = charloc;
*(token->prev_token_line) = tokenline;
COutputToken(token, token_buffer, ProtoType,
infname, outbuf);
}
break;
case '(':
if (variable_seen) {
/* this is a variable declaration */
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable,
infname, outbuf);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
infname, outbuf);
}
else {
/* move to level zero again */
CToLevelZero(token, token_buffer, infname, outbuf);
/* obtain the next token */
token_found = CNextToken(token, token_buffer,
infname, outbuf);
if (token_found) {
/* check if prototype, function or function pointer
* variable declaration */
switch (token->cur_token[0]) {
case '=':
/* this is a function pointer variable
* declaration */
COutputCommaDelimitedToken(token,
token_buffer,
GlobalVariable,
infname, outbuf);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
infname, outbuf);
break;
case ';':
/* this is a prototype, output it */
COutputToken(token, token_buffer,
ProtoType, infname, outbuf);
break;
default:
/* this is a function */
COutputToken(token, token_buffer,
Function, infname, outbuf);
/* parse through function */
punctuator_found = TRUE;
while (token->cur_token[0] != '{' &&
punctuator_found) {
punctuator_found =
CToPunctuator(token, token_buffer,
infname, outbuf);
}
if (punctuator_found) {
CToLevelZero(token, token_buffer, infname, outbuf);
}
break;
}
}
}
break;
case '[':
case '=':
case ',':
/* global variables */
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable,
infname, outbuf);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
infname, outbuf);
break;
default:
/* this is a function, copy saved token back to prev_token,
* output and continue */
strcpy(token->prev_token, line_buf);
*(token->prev_char_location) = charloc;
*(token->prev_token_line) = tokenline;
COutputToken(token, token_buffer,
Function, infname, outbuf);
/* parse through function */
punctuator_found = TRUE;
while (token->cur_token[0] != '{' &&
punctuator_found) {
punctuator_found =
CToPunctuator(token, token_buffer, infname, outbuf);
}
if (punctuator_found) {
CToLevelZero(token, token_buffer, infname, outbuf);
}
break;
}
}
free(line_buf);
}
/*----------------------------------------------------------------------------
*
* CParseNOP() will parse an as of yet unrecognized statement. If I run into
* a punctuator at this time then I have found either a structure declaration
* (C++ 2.0), or a global variable declaration. If the punctuator is '[',
* ',', '=', or ';' then it is a global variable declaration. If the
* punctuator is a '{' then we have a structure declaration at this time we
* should not run into any closing punctuators or syntax is in a bad way
*
---------------------------------------------------------------------------*/
CParseNOP(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
BOOLEAN token_found;
switch (token->cur_token[0]) {
case ';':
case '=':
case ',':
case '[':
/* global variables are here */
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable,
infname, outbuf);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
infname, outbuf);
token->extern_active = FALSE;
break;
case '{':
/* this is a structure (C++ syntax) */
/* output it */
COutputToken(token, token_buffer, Structure, infname, outbuf);
/* move through declaration */
CToLevelZero(token, token_buffer, infname, outbuf);
/* get the next token */
token_found = CNextToken(token, token_buffer, infname, outbuf);
/* if a token is available then output the list */
if (token_found) {
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
infname, outbuf);
}
token->extern_active = FALSE;
break;
case '(':
CParseFunctionOrGlobalVariable(token, token_buffer,
infname, outbuf);
token->extern_active = FALSE;
break;
default:
/* true NOP */
break;
}
}
/*----------------------------------------------------------------------------
*
* CParseDeclarationStatement() will parse struct, enum and union
* declarations. take the token just before the first punctuator, run
* through the top level braces and parse for variables if the first
* punctuator is a ';' then this is a global variable declaration, if the
* first token[0] is a '{' then this is a global variable declaration
*
---------------------------------------------------------------------------*/
CParseDeclarationStatement(token, token_buffer, type, infname, outbuf)
Token *token;
Buffer *token_buffer;
SymbolType type;
char *infname;
char *outbuf;
{
BOOLEAN token_found;
BOOLEAN punctuator_found;
BOOLEAN primary_parse;
punctuator_found = CToPunctuator(token, token_buffer, infname, outbuf);
if (punctuator_found) {
/* init */
primary_parse = TRUE;
/* switch on current token */
switch (token->cur_token[0]) {
/* this is truly an object declaration */
case '{':
/* output only if this is not a variable declaration */
if (token->token_count != 1) {
/* output it */
COutputToken(token, token_buffer, type, infname, outbuf);
}
/* move through declaration and fall through */
CToLevelZero(token, token_buffer, infname, outbuf);
/* get the next token, if one not available then break out of
* case */
token_found = CNextToken(token, token_buffer, infname, outbuf);
if (!token_found)
break;
/* fall through to take care of variable declarations after
* setting pre-parse flag */
primary_parse = FALSE;
case ';':
case '=':
case ',':
case '[':
/* if this is the first seen then output it */
if (primary_parse) {
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable,
infname, outbuf);
}
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
infname, outbuf);
break;
case '(':
CParseFunctionOrGlobalVariable(token, token_buffer,
infname, outbuf);
break;
default:
/* not reached */
break;
}
}
}
/*----------------------------------------------------------------------------
*
* CParseTypeDefinition() parses the typedef statement. take the token just
* before the first *correct* punctuator, the ';', ',' or the '['. Tag any
* declarations being done here, get the next token
*
---------------------------------------------------------------------------*/
CParseTypeDefinition(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
BOOLEAN token_found;
BOOLEAN parens_found;
BOOLEAN special_found;
int token_count;
SymbolType tmptype;
token_found = CNextToken(token, token_buffer, infname, outbuf);
if (token_found) {
/* check the type of the token for future use */
tmptype = CTokenType(token->cur_token);
/* parse the typedef */
parens_found = FALSE;
special_found = FALSE;
token_count = 0;
while (token->cur_token[0] != ';' &&
token->cur_token[0] != ',' &&
token->cur_token[0] != '[' &&
token_found &&
!special_found) {
/* parse for defines */
if (token_found) {
/* handle the punctuator */
switch (token->cur_token[0]) {
case '{':
/* pass through any defines going on here */
if (token->cur_token[0] == '{') {
/* if the token count is > 1 here then we have a
* named declaration and need to output the
* token, output only if the token type is enum,
* struct, or union */
if (token_count > 1 &&
(tmptype == Structure ||
tmptype == Enumeration ||
tmptype == Union)) {
COutputToken(token, token_buffer,
tmptype, infname, outbuf);
}
/* go back to level 0 */
CToLevelZero(token, token_buffer, infname, outbuf);
}
break;
case '(':
/* if this is the top level and we have already been
* through a set of parens then we know this to be a
* function typedef so we ouput the previous token,
* otherwise check the previous token and if it is a
* known keyword then just eat the token and continue */
if (parens_found) {
COutputToken(token, token_buffer,
TypeDefinition, infname, outbuf);
CToLevelZero(token, token_buffer,
infname, outbuf);
special_found = TRUE;
}
else {
/* Move back to the top level */
CParseParens(token, token_buffer,
infname, outbuf);
/* next paren we find we know we have a token */
parens_found = TRUE;
/* swap to prevent loss of token */
CTokenSwap(token);
}
break;
default:
/* if we have another token after a paren parse then
* we know the token in the parens was nothing
* special */
parens_found = FALSE;
break;
}
}
/* get another token */
CTokenSwap(token);
token_found = CNextToken(token, token_buffer, infname, outbuf);
token_count++;
}
/* output the typedef names if appropriate */
if (token->prev_token[0] != '}' &&
token_found) {
/* don't output the first token if already done */
if (!special_found) {
COutputCommaDelimitedToken(token, token_buffer,
TypeDefinition,
infname, outbuf);
}
/* parse through the rest of the typedef names */
CParseCommaDelimitedList(token, token_buffer,
TypeDefinition,
infname, outbuf);
}
}
}
/*----------------------------------------------------------------------------
*
* CParseClass() will parse the C++ class syntax. take the token just before
* the first '{', ',' or ':' and run through the top level braces if there
*
---------------------------------------------------------------------------*/
CParseClass(token, token_buffer, infname, outbuf)
Token *token;
Buffer *token_buffer;
char *infname;
char *outbuf;
{
BOOLEAN token_found;
token_found = TRUE;
while (token->cur_token[0] != '{' &&
token->cur_token[0] != ':' &&
token->cur_token[0] != ';' &&
token_found) {
/* save the current token */
CTokenSwap(token);
/* get the next token */
token_found = CNextToken(token, token_buffer, infname, outbuf);
}
/* output the class name */
if (token_found) {
COutputToken(token, token_buffer, Class, infname, outbuf);
/* parse through the remainder of the statement */
while (token->cur_token[0] != ';' &&
token_found) {
if (token->cur_token[0] == '{') {
/* move back to the zero level */
CToLevelZero(token, token_buffer, infname, outbuf);
}
token_found = CNextToken(token, token_buffer, infname, outbuf);
}
}
}
/*----------------------------------------------------------------------------
*
* CTags() tags an input stream assuming standard ANSI 2.0 C/C++ syntax.
* Long tokens are allowed, ANSI requires only 31 significant, note that if
* token length exceeds MAX_TOKEN_LENGTH this parser will die a horrible
* death (or at the very least do ugly things to someone else's memory),
* with the large size of MAX_TOKEN_LENGTH, anyone caught on this hook
* deserves what they get...
*
---------------------------------------------------------------------------*/
CTags(inbuf, infname, outbuf)
char *inbuf;
char *infname;
char *outbuf;
{
SymbolType type; /* the type of the current token */
Token *token; /* current state variable */
Buffer *token_buffer; /* input buffer */
BOOLEAN token_found; /* set by CNextToken() */
/* allocate the Buffer and Token memory */
token = (Token *) malloc(sizeof(Token));
token_buffer = (Buffer *) malloc(sizeof(Token));
/* init the parser engine */
point = 0;
CParserInit();
token->token_count = 0;
/* init the current token buffers */
token->cur_token = token->sbuf1;
token->cur_char_location = &(token->charloc1);
token->cur_token_line = &(token->tokenline1);
token->cur_token[0] = '\0';
*(token->cur_char_location) = 0;
*(token->cur_token_line) = 1;
/* init the previous token buffers */
token->prev_token = token->sbuf2;
token->prev_char_location = &(token->charloc2);
token->prev_token_line = &(token->tokenline2);
token->prev_token[0] = '\0';
*(token->prev_char_location) = 0;
*(token->prev_token_line) = 1;
/* init the input buffers */
token_buffer->token_line_location = 1;
token_buffer->inbuf = inbuf;
/* init Extern state */
token->extern_active = FALSE;
/* get the first token */
token_found = CNextToken(token, token_buffer, infname, outbuf);
/* loop through the file */
while (token_found) {
/* obtain the token type */
type = CTokenType(token->cur_token);
/* react on the token type */
switch (type) {
case NOP:
CParseNOP(token, token_buffer, infname, outbuf);
break;
case Structure:
case Enumeration:
case Union:
CParseDeclarationStatement(token, token_buffer,
type, infname, outbuf);
break;
case TypeDefinition:
CParseTypeDefinition(token, token_buffer, infname, outbuf);
break;
case Class:
CParseClass(token, token_buffer, infname, outbuf);
break;
case Extern:
token->extern_active = TRUE;
break;
default:
/* not reached */
break;
}
if (type != Extern &&
type != NOP) {
/* turn off the extern flag */
token->extern_active = FALSE;
}
/* swap state variables and get the next token */
CTokenSwap(token);
token_found = CNextToken(token, token_buffer, infname, outbuf);
}
free(token);
free(token_buffer);
}
/*----------------------------------------------------------------------------
*
* tag_suffix_c(), tag_suffix_h() and tag_suffix_e() are recognized procedure
* names to the tags package in Epsilon and will be called automatically when
* tagging needs to happen for these extensions. These are replacement names
* for the routines of the same name defined in tags.e.
*
---------------------------------------------------------------------------*/
tag_suffix_c()
{
/* the third parameter, the output buffer name is not actually used by
* anyone but is left here for a time when this information may be
* needed. The current algorithm is to let the funtion add_tag() decide
* the buffer name to send the output to. As a little more than
* coincedence, the name used here is the same used in add_tag() defined
* in tags.e */
CTags(bufname, filename, "-tags");
}
tag_suffix_h()
{
tag_suffix_c();
}
tag_suffix_e()
{
tag_suffix_c();
}
/* rebuild the default character maps */
when_loading()
{
#define UCLC(up, low) def_char_class[low] = C_LOWER, \
def_char_class[up] = C_UPPER, \
def_srch_case_map[up] = low, \
def_case_map[low] = up, \
def_case_map[up] = low
int i, j;
for (i = 0; i < 256; i++)
def_case_map[i] = def_srch_case_map[i] = i;
for (i = 'A', j = 'a'; i <= 'Z'; i++, j++)
UCLC(i, j);
for (i = 131; i < 154; i++)
def_char_class[i] = C_LOWER;
for (i = 160; i < 164; i++)
def_char_class[i] = C_LOWER;
UCLC('Ç', 'ç');
UCLC('Ä', 'ä');
UCLC('Å', 'å');
UCLC('É', 'é');
UCLC('Æ', 'æ');
UCLC('Ö', 'ö');
UCLC('Ü', 'ü');
UCLC('Ñ', 'ñ');
}